# -*- coding: utf-8 -*-
import scrapy
import codecs
from scrapy.crawler import CrawlerProcess
import sys
sys.path.insert(0,'../../')
from tutorial.items import *

class JokeSpider(scrapy.Spider):
    name = 'joke'
    allowed_domains = ['jokeji.cn']
    start_urls = [
        "http://www.jokeji.cn/jokehtml/ert/2017092523515651.htm",
        "http://www.jokeji.cn/jokehtml/bxnn/2017092414391740.htm",
        "http://www.jokeji.cn/jokehtml/zh/20090319162120.htm",
        "http://www.jokeji.cn/jokehtml/bxqm/2017092414350783.htm"
    ]

    def parse(self, response):
        contents = response.xpath("//*[@id='text110']/p").xpath("string(.)").extract()

        for content in contents:

            item = TutorialItem()
            item['content'] = content
            yield  item

if __name__ == '__main__':
    process = CrawlerProcess()
    process.crawl(JokeSpider)
    process.start()
